{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "04d4165c-fab2-4f54-9b50-11d53917d785",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "!pip install dashvector dashscope\n",
    "!pip install transformers_stream_generator python-dotenv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "0b53db17-7d6d-4192-a145-e470d6d2a6ec",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2023-08-08T05:50:04.277160Z",
     "iopub.status.busy": "2023-08-08T05:50:04.276831Z",
     "iopub.status.idle": "2023-08-08T05:50:04.280156Z",
     "shell.execute_reply": "2023-08-08T05:50:04.279627Z",
     "shell.execute_reply.started": "2023-08-08T05:50:04.277142Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import dashscope\n",
    "import os\n",
    "from dotenv import load_dotenv\n",
    "from dashscope import TextEmbedding\n",
    "from dashvector import Client, Doc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "728a2bf5-905c-48ef-b70a-be53d4f8fcc0",
   "metadata": {
    "ExecutionIndicator": {
     "show": false
    },
    "execution": {
     "iopub.execute_input": "2023-08-08T05:50:08.430394Z",
     "iopub.status.busy": "2023-08-08T05:50:08.430068Z",
     "iopub.status.idle": "2023-08-08T05:50:08.500388Z",
     "shell.execute_reply": "2023-08-08T05:50:08.499873Z",
     "shell.execute_reply.started": "2023-08-08T05:50:08.430376Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "load_dotenv()\n",
    "api_key = os.getenv('DASHSCOPE_KEY')\n",
    "\n",
    "dashscope.api_key = api_key\n",
    "ds_client = Client(api_key=api_key)\n",
    "ds_client.delete('tl_embeddings')\n",
    "rsp = ds_client.create('tl_embeddings', 1536)\n",
    "collection = ds_client.get('tl_embeddings')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "d65c0f3f-a080-4803-b5ed-f4e641a96db2",
   "metadata": {
    "ExecutionIndicator": {
     "show": false
    },
    "execution": {
     "iopub.execute_input": "2023-08-08T05:50:10.112184Z",
     "iopub.status.busy": "2023-08-08T05:50:10.111841Z",
     "iopub.status.idle": "2023-08-08T05:50:10.116300Z",
     "shell.execute_reply": "2023-08-08T05:50:10.115703Z",
     "shell.execute_reply.started": "2023-08-08T05:50:10.112164Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "def prepare_data(path, size):\n",
    "    batch_docs = []\n",
    "    with open(path, 'r', encoding='utf-8') as f:\n",
    "        local_doc = ''\n",
    "        local_num = 0\n",
    "        for line in f:\n",
    "            if local_num < 12 and line.strip() != '':\n",
    "                local_doc += line.strip()\n",
    "                local_num += 1\n",
    "            if local_num == 12:\n",
    "                batch_docs.append(local_doc)\n",
    "                if len(batch_docs) == size:\n",
    "                    yield batch_docs[:]\n",
    "                    batch_docs.clear()\n",
    "                local_doc = ''\n",
    "                local_num = 0\n",
    "\n",
    "    if batch_docs:\n",
    "        yield batch_docs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "def generate_embeddings(news):\n",
    "    rsp = TextEmbedding.call(model=TextEmbedding.Models.text_embedding_v1,\n",
    "                             input=news)\n",
    "    #print(rsp)\n",
    "    embeddings = [record['embedding'] for record in rsp.output['embeddings']]\n",
    "    return embeddings if isinstance(news, list) else embeddings[0]"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "a833b000cd7b8f0d"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5c0ba7e1-001f-4bb9-9bdb-7eb318bc3550",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "id = 0\n",
    "file_location = '天龙八部.txt'\n",
    "collection = ds_client.get('tl_embeddings')\n",
    "batch_size = 1  # embedding api max batch size\n",
    "for news in list(prepare_data(file_location, batch_size)):\n",
    "    #print(news)\n",
    "    ids = [id + i for i, _ in enumerate(news)]\n",
    "    id += len(news)\n",
    "    print(id)\n",
    "    vectors = generate_embeddings(news)\n",
    "    print(news)\n",
    "    # 写入 dashvector 构建索引\n",
    "    ret = collection.upsert(\n",
    "        [\n",
    "            Doc(id=id, vector=vector, fields={\"raw\": doc})\n",
    "            for id, doc, vector in zip(ids, news, vectors)\n",
    "        ]\n",
    "    )\n",
    "    print(ret)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "41e54ddd-145d-49c3-ade4-4a46dc34e07b",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2023-08-08T05:50:54.514427Z",
     "iopub.status.busy": "2023-08-08T05:50:54.514108Z",
     "iopub.status.idle": "2023-08-08T05:50:54.518027Z",
     "shell.execute_reply": "2023-08-08T05:50:54.517401Z",
     "shell.execute_reply.started": "2023-08-08T05:50:54.514407Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "def search_relevant_news(question, topk=1, client=ds_client):\n",
    "    # 获取刚刚存入的集合\n",
    "    collection = client.get('tl_embeddings')\n",
    "\n",
    "    # 向量检索：指定 topk = 1\n",
    "    rsp = collection.query(generate_embeddings(question), output_fields=['raw'],\n",
    "                           topk=topk)\n",
    "    \n",
    "    # return rsp.output[0].fields['raw']\n",
    "    return \"\".join([item.fields['raw'] for item in rsp.output])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "53bed7e4-35be-4df6-8775-7d62fcdb6457",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2023-08-08T05:50:55.881775Z",
     "iopub.status.busy": "2023-08-08T05:50:55.881413Z",
     "iopub.status.idle": "2023-08-08T05:50:55.962111Z",
     "shell.execute_reply": "2023-08-08T05:50:55.961592Z",
     "shell.execute_reply.started": "2023-08-08T05:50:55.881754Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\"code\": 0, \"message\": \"\", \"requests_id\": \"b996efdb-0030-4476-945e-53e9d760aaf7\", \"output\": {\"total_doc_count\": 1041, \"total_segment_count\": 1, \"total_index_file_count\": 2, \"total_index_file_size\": 2142208, \"total_delete_doc_count:\": 99, \"index_completeness\": 1.0, \"partitions\": {\"default\": {\"name\": \"default\", \"total_doc_count\": 1041, \"total_segment_count\": 1, \"total_index_file_count\": 2, \"total_index_file_size\": 2142208, \"total_delete_doc_count\": 99}}}}\n"
     ]
    }
   ],
   "source": [
    "collection = ds_client.get('tl_embeddings')\n",
    "rsp = collection.stats()\n",
    "print(rsp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "409236b9-87d4-4df0-8ee6-486d3c0e5fb6",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "question = '虚竹的女朋友是谁'\n",
    "context = search_relevant_news(question, topk=1)\n",
    "print(context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "730abebb-1f5a-4fb9-b035-fb2ae09a31c9",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from modelscope import AutoModelForCausalLM, AutoTokenizer\n",
    "from modelscope import GenerationConfig\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"qwen/Qwen-7B-Chat\", revision = 'v1.0.1',trust_remote_code=True)\n",
    "model = AutoModelForCausalLM.from_pretrained(\"qwen/Qwen-7B-Chat\", revision = 'v1.0.1',device_map=\"auto\", trust_remote_code=True, fp16=True).eval()\n",
    "model.generation_config = GenerationConfig.from_pretrained(\"Qwen/Qwen-7B-Chat\",revision = 'v1.0.1', trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "2f5a1bcb-e83a-44d3-bbe4-f97437782a3b",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2023-08-08T03:14:59.673673Z",
     "iopub.status.busy": "2023-08-08T03:14:59.673186Z",
     "iopub.status.idle": "2023-08-08T03:14:59.678682Z",
     "shell.execute_reply": "2023-08-08T03:14:59.678130Z",
     "shell.execute_reply.started": "2023-08-08T03:14:59.673651Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "def answer_question(question, context):\n",
    "    prompt = f'''请参考```内的内容回答问题。\"\n",
    "\t```\n",
    "\t{context}\n",
    "\t```\n",
    "\t我的问题是：{question}。\n",
    "    '''\n",
    "    history = None\n",
    "    print(prompt)\n",
    "    response, history = model.chat(tokenizer, prompt, history=None)\n",
    "    return response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "75ac8f4a-a861-4376-9e55-ebefef9a9cd6",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2023-08-08T02:09:21.861453Z",
     "iopub.status.busy": "2023-08-08T02:09:21.861074Z",
     "iopub.status.idle": "2023-08-08T02:09:29.870851Z",
     "shell.execute_reply": "2023-08-08T02:09:29.870216Z",
     "shell.execute_reply.started": "2023-08-08T02:09:21.861432Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "请参考```内的内容回答问题。\"\n",
      "\t```\n",
      "\t\n",
      "\t```\n",
      "\t我的问题是：天龙八部是什么意思。\n",
      "    \n",
      "question: 天龙八部是什么意思\n",
      "answer: 天龙八部是一部中国古代武侠小说，作者是金庸先生。小说讲述了宋朝时期，几个年轻人在江湖上寻找自我，历经磨练，最终成为武林中的顶尖高手的故事。天龙八部中包括了八个主要人物，分别代表了八种不同的性格和技能。小说中充满了江湖情仇、爱情、友情、勇气和智慧等元素，深受读者喜爱。\n"
     ]
    }
   ],
   "source": [
    "answer = answer_question(question, '')\n",
    "print(f'question: {question}\\n' f'answer: {answer}')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "eca328fc-cd69-4e12-8448-f426f3314414",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2023-08-08T03:15:00.955075Z",
     "iopub.status.busy": "2023-08-08T03:15:00.954751Z",
     "iopub.status.idle": "2023-08-08T03:15:04.072107Z",
     "shell.execute_reply": "2023-08-08T03:15:04.071373Z",
     "shell.execute_reply.started": "2023-08-08T03:15:00.955057Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "请参考```内的内容回答问题。\"\n",
      "\t```\n",
      "\t　　萧峰听西夏公主命那宫女向众人逐一询问三个相同的问题，料想其中虽有深意，但显无加害众人之心，寻思这三个问题问到自己之时，该当如何回答？念及阿朱，胸口一痛，伤心欲绝，却不愿在旁人之前泄露自己心情，当即转身出了石堂。其时室门早开，他出去时脚步轻盈，旁人大都并未知觉。\n",
      "　　那宫女道：“却不知萧大侠因何退去？是怪我们此举无礼么？”虚竹道：“我大哥并不是小气之人，不会因此见怪。嗯，他定是酒瘾发作，到外面喝酒去了。”那宫女笑道：“正是。素闻萧大侠豪饮，酒量天下无双，我们这里没有备酒，难留嘉宾，实在太过慢客。这位先生见到萧大侠之时，还请转告敝邦公主殿下的歉意。”这宫女能说会道，言语得体，比之在外厢款客的那个怕羞宫女口齿伶俐百倍。虚竹道：“我见到大哥时，跟他说便了。”\n",
      "　　那宫女道：“先生尊姓大名？”虚竹道：“我么……我么……我道号虚竹子。我是……出……出……那个……决不是来求亲的，不过陪着我三弟来而已。”\n",
      "　　那宫女问道：“先生平生在甚么地方最是快乐？”\n",
      "　　虚竹轻叹一声，说道：“在一个黑暗的冰窖之中。”\n",
      "　　忽听得一个女子声音“啊”的一声低呼，跟着呛啷一声响，一只瓷杯掉到地下，打得粉碎。\n",
      "　　那宫女又问：“先生生平最爱之人，叫甚么名字？”\n",
      "　　虚竹道：“唉！我……我不知道那位姑娘叫甚么名字。”\n",
      "　　众人都哈哈大笑起来，均想此人是个大傻瓜，不知对方姓名，便倾心相爱。\n",
      "　　那宫女道：“不知那位姑娘的姓名，那也不是奇事。当年孝子董永见到天上仙女下凡，并不知她的姓名底细，就爱上了她。虚竹子先生，这位姑娘的容貌定然是美丽非凡了？”\n",
      "　　虚竹道：“她容貌如何，我也是从来没看见过。”\n",
      "　　霎时之间，石室中笑声雷动，都觉真是天下奇闻，也有人以为虚竹是故意说笑。\n",
      "　　众人哄笑声中，忽听得一个女子声音低低问道：“你……你可是‘梦郎’么？”虚竹大吃一惊，颤声道：“你……你……你可是‘梦姑’么？这可想死我了。”不由自主的向前跨了几步，只闻到一阵馨香，一只温软柔滑的手掌已握住了他手，一个熟悉的声音在他耳边悄声道：“梦郎，我便是找你不到，这才请父皇贴下榜文，邀你到来。”虚竹更是惊讶，道：“你……你便是……”那少女道：“咱们到里面说话去，梦郎，我日日夜夜，就盼有此时此刻……”一面细声低语，一面握着他手，悄没声的穿过帷幕，踏着厚厚的地毯，走向内堂。\n",
      "\n",
      "\t```\n",
      "\t我的问题是：虚竹的女朋友是谁。\n",
      "    \n",
      "question: 虚竹的女朋友是谁\n",
      "answer: 虚竹的女朋友是梦姑。\n"
     ]
    }
   ],
   "source": [
    "answer = answer_question(question, context)\n",
    "print(f'question: {question}\\n' f'answer: {answer}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
